First, We load the required packages
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
from io import StringIO
import requests
from sklearn.preprocessing import MinMaxScaler
from keras_anomaly_detection.library.plot_utils import visualize_reconstruction_error
from keras_anomaly_detection.library.convolutional import Conv1DAutoEncoder
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (confusion_matrix, precision_recall_curve, auc,
roc_curve, recall_score, classification_report, f1_score,
precision_recall_fscore_support)
from random import random
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import json
import sys
import warnings
if not sys.warnoptions:
warnings.simplefilter("ignore")
In order to read the data it can be done by two different methods
In order to read the data from BigQuery one needs the JSON file containing the authentication key in order to be able to excecute queries involving said data. In this case Im gonna use the file "bigquery-test-228612-bbb8922fe944.json" located at my documents.
query = """
SELECT *
FROM `bigquery-test-228612.ECGData.Test`
LIMIT 1000
"""
df = pd.read_gbq(query,
project_id='bigquery-test-228612',
private_key='C:/Users/DELL/Documents/bigquery-test-228612-9d37266d3392.json',
dialect='standard')
df=df.iloc[1:len(df.index),:]
Col=np.linspace(1,len(df.columns),len(df.columns))
df.columns=Col
df.head()
Then, we read the ECG data
url='https://raw.githubusercontent.com/chen0040/keras-anomaly-detection/master/demo/ecg_demo/data/ecg_discord_test.csv'
s=requests.get(url).text
df=pd.read_csv(StringIO(s),header=None)
df.head()
Now we perform an EDA over the data
Tdf=df.T
Tdf.describe()
# The number and indexes of the series you want to plot
Tdf=df.iloc[15:17]
Tdf=Tdf.T
Tdf.plot()
sns.pairplot(df.T)
corr = (np.transpose(df)).corr()
f, ax = plt.subplots(figsize=(14, 7))
hm = sns.heatmap(corr, annot=True, ax=ax, cmap="coolwarm",fmt='.2f',
linewidths=.05)
f.subplots_adjust(top=0.93)
t= f.suptitle('Correlation Heatmap', fontsize=14)
df['ID'] = pd.Series(np.linspace(1,len(df.index),len(df.index)), index=df.index)
Data = pd.melt(df.iloc[0:10,:],id_vars="ID",var_name="Series",value_name='Value') # Boxplot of the first 10 series
plt.figure(figsize=(10, 10))
sns.boxplot(x="ID", y="Value", data=Data)
After the EDA we add some artificial outliers and inliers. Given the distribution of the data and its maximum and minimum values the new data was created as a continuous random number between 2 and 7
# Generate normal series
NormalSeriesNew=7 #How many new normal data you want
for i in range (NormalSeriesNew):
ls=[]
for j in range(len(df.columns)):
value = random()
ls.append(2+(value*5))
numEl = len(ls)
newRow = pd.DataFrame(np.array(ls).reshape(1,numEl), columns = list(df.columns))
df = df.append(newRow, ignore_index=True)
# Generate abnormal series
AbnormalSeriesNew=5 #How many new normal data you want
for i in range (AbnormalSeriesNew):
ls=[]
for j in range(len(df.columns)):
value = random()
ls.append((2+(value*5))*5)
numEl = len(ls)
newRow = pd.DataFrame(np.array(ls).reshape(1,numEl), columns = list(df.columns))
df = df.append(newRow, ignore_index=True)
df
With the complete data we create the labels for the outliers (1) and inliers (0)
labels=[]
Tdf= df.T
for i in range(len(df.index)):
if Tdf[i].mean()<10:
labels.append(0)
else:
labels.append(1)
Now with the data and labels we construct the NN model for the anomaly detection
from keras.callbacks import TensorBoard
from keras.layers import Conv1D, GlobalMaxPool1D, Dense, Flatten
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from numpy.random import seed
tensorboard = TensorBoard(log_dir='./logs', histogram_freq=0,write_graph=True, write_images=False)
df_np_data = df.values
scaler = MinMaxScaler()
df_np_data = scaler.fit_transform(df_np_data)
model = Sequential()
model.add(Conv1D(filters=256, kernel_size=5, padding='same', activation='relu', input_shape=(df_np_data.shape[1], 1)))
model.add(GlobalMaxPool1D())
model.add(Dense(units=df_np_data.shape[1], activation='linear'))
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])
Xin=np.expand_dims(df_np_data, axis=2)
Yin=df_np_data
seed(999)
model.fit(x=Xin, y=Yin,batch_size=8, epochs=100,verbose=2 ,validation_split=0.1, callbacks=[tensorboard])
In order to view the results in tensorboard, open the console and type: tensorboard --logdir C:\Users\DELL\Documents\logs --host=127.0.0.1 and press enter
After pressing enter the console will show a direction. Open your browser and paste the direction. This direction will send you to the tensorboard panel
The argument "C:\Users\DELL\Documents\logs" refers to your log directory
It should show you something like this
from IPython.display import Image
i = Image(filename='C:/Users/DELL/Pictures/TensorBoard/Conv1D/1.PNG')
i
i = Image(filename='C:/Users/DELL/Pictures/TensorBoard/Conv1D/2.PNG')
i
i = i = Image(filename='C:/Users/DELL/Pictures/TensorBoard/Conv1D/3.PNG')
i
Now the anomaly detection
## For each of the observations it predicts wheter or not is an anomaly
estimated_negative_sample_ratio = 0.9
input_timeseries_dataset = np.expand_dims(df_np_data , axis=2)
target_timeseries_dataset = model.predict(x=input_timeseries_dataset)
dist = np.linalg.norm(df_np_data - target_timeseries_dataset, axis=-1)
scores = dist
scores.sort()
cut_point = int(estimated_negative_sample_ratio * len(scores))
threshold = scores[cut_point]
dist = np.linalg.norm(df_np_data - target_timeseries_dataset, axis=-1)
Result=zip(dist >= threshold, dist)
reconstruction_error = []
for idx, (is_anomaly, dist) in enumerate(Result):
print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')')
reconstruction_error.append(dist)
# Visualize the anomalies
visualize_reconstruction_error(reconstruction_error, threshold)
Now the ROC Curve and the confusion matrix
TClass=pd.DataFrame(labels,columns=["Class"])
TClass['Rec_Error'] = pd.Series(reconstruction_error).values
TClass
## ROC CURVE
fpr, tpr, thresholds = roc_curve(TClass.Class, TClass.Rec_Error)
roc_auc = auc(fpr, tpr)
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, label='AUC = %0.4f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.01, 1.01])
plt.ylim([0, 1.001])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
#Quality metrics and graphs for the model of anomaly detection
precision, recall, th = precision_recall_curve(TClass.Class, TClass.Rec_Error)
plt.plot(recall, precision, 'b', label='Precision-Recall curve')
plt.title('Recall vs Precision')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.show()
plt.plot(th, precision[1:], 'b', label='Threshold-Precision curve')
plt.title('Precision for different threshold values')
plt.xlabel('Threshold')
plt.ylabel('Precision')
plt.show()
plt.plot(th, recall[1:], 'b', label='Threshold-Recall curve')
plt.title('Recall for different threshold values')
plt.xlabel('Reconstruction error')
plt.ylabel('Recall')
plt.show()
## CONFUSSION MATRIX
LABELS = ["Normal", "Outlier"]
y_pred = [1 if e >= threshold else 0 for e in TClass.Rec_Error.values]
conf_matrix = confusion_matrix(TClass.Class, y_pred)
plt.figure(figsize=(12, 12))
sns.heatmap(conf_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d");
plt.title("Confusion matrix")
plt.ylabel('True class')
plt.xlabel('Predicted class')
plt.show()
The model we trained needs to be in keras with this configuration
model = keras.models.Sequential()
model.add(keras.layers.Conv1D(filters=256, kernel_size=5, padding='same', activation='relu', input_shape=(df_np_data.shape[1], 1)))
model.add(keras.layers.GlobalMaxPool1D())
model.add(keras.layers.Dense(units=df_np_data.shape[1], activation='linear'))
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])
model.fit(x=Xin, y=Yin,batch_size=8, epochs=100,verbose=2 ,validation_split=0.1)
One we sucessfully train a validated NN with good performance, we can deploy the model into google cloud in order to be able to request predictions from any computer. In order to acoplish this we must follow the steps:
model_input_name = model.input_names[0]
estimator_model = keras.estimator.model_to_estimator(keras_model=model, model_dir="./estimator_model/")
This code will create a folder named Keras inside the apointed directory. We need to take the files out of that folder and into the root apointed directory.
def serving_input_receiver_fn():
Series = {model_input_name:tf.placeholder(dtype=tf.float32,name='Time_Series',shape=[None,210,1]),}
return tf.estimator.export.ServingInputReceiver(Series, Series)
export_path = estimator_model.export_savedmodel('./', serving_input_receiver_fn=serving_input_receiver_fn)
This will create a folder with: A file called SavedModel.pb, that contains the model, and a folder caller variables, that contains the weights of the trained network
i = Image(filename='C:/Users/DELL/Pictures/Folder.PNG')
i
i = Image(filename='C:/Users/DELL/Pictures/Bucket.PNG')
i
i = Image(filename='C:/Users/DELL/Pictures/Model.PNG')
i
i = Image(filename='C:/Users/DELL/Pictures/Version.PNG')
i
i = Image(filename='C:/Users/DELL/Pictures/VersionC.PNG')
i
Dat=df_np_data[24:25]
Dat=Dat.T
Dat=Dat.tolist()
Predictions={model_input_name:Dat}
json_filename = 'Serie24_Def_1.json'
with open(json_filename, 'w') as outfile:
json.dump(Predictions, outfile)
We define the model name, the version and the input file (This file needs to be uploades to shell first)
MODEL_NAME=AnomalyDetection
INPUT_DATA_FILE="Serie24_Def_1.json"
VERSION_NAME=RealAD
i = Image(filename='C:/Users/DELL/Pictures/Shell.PNG')
i
With the parameters defined we request some predictions with the code
i = Image(filename='C:/Users/DELL/Pictures/Pred.PNG')
i
After runnig the code the requested predictions appear on screen
i = Image(filename='C:/Users/DELL/Pictures/Predd.PNG')
i